Report last updated Tue May 31 14:52:09 2016.

library(ggplot2)
library(reshape)
library(limma)
library(CHBUtils)
library(biomaRt)
library(dplyr)
library(cluster)
library(gridExtra)
library(logging)
library(org.Mm.eg.db)
library(DEGreport)
library(dplyr)
order_group=c("control", "day1", "day2", "day7", "day14")
# root_path = "~/orch/scratch/vishal_mirna_kidney/publish/FA-model"
result_files = file.path(root_path, "omics/files_publish")
dir.create(result_files, showWarnings = F, recursive = T)
basicConfig(level='INFO')

Protein analysis

FC > 4 and FDR < 1%

protein_file = file.path(root_path, "protein", "files_publish")
dir.create(protein_file, recursive = T, showWarnings = F)
counts = read.csv(file.path(root_path, "protein", "FA_Proteomics_RawData.csv"),skip = 3) %>% tidyr::separate(Protein.Id, c("sp", "Uniprot.ID", "type"),sep = "[::|::]", extra = "merge")
row.names(counts) = counts$Uniprot.ID

mapping = data.frame(id=rownames(counts))
mapping = annotate_df(mapping, "id", 'mmusculus_gene_ensembl', "uniprot_genename", "ensembl_gene_id")  %>% distinct(ensembl_gene_id)

mapping2 = data.frame(id=unique(counts$Gene.Symbol)[!is.na(unique(counts$Gene.Symbol))])
mapping2 = annotate_df(mapping2, "id", 'mmusculus_gene_ensembl', "wikigene_name", "ensembl_gene_id")  %>% distinct(ensembl_gene_id)

counts[as.character(mapping$id), "ensembl"] = mapping$ensembl_gene_id 
idx = match(as.character(mapping2$id),as.character(counts$Gene.Symbol))
counts[idx, "ensembl"] = mapping2$ensembl_gene_id
counts$symbol = convertIDs(as.character(counts$ensembl), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
save_file(counts, "counts_annotated.csv", protein_file)

counts = counts %>% distinct(ensembl) %>% filter(!is.na(ensembl))
clean_counts = counts[,7:16]
rownames(clean_counts) = counts$ensembl
names(clean_counts) = sapply(names(clean_counts), tolower)
names(clean_counts)[1:10] = c("control-1", "control-2", "day1-1", "day1-2",
                       "day2-1", "day2-2", "day7-1", "day7-2", "day14-1", "day14-2")

cat("## Expression density of 'raw' data\n\n")

Expression density of ‘raw’ data

ggplot(melt(clean_counts), aes(x=log2(value), colour=variable)) + geom_density()

dge = edgeR::DGEList(clean_counts)
dge = edgeR::calcNormFactors(dge, method="TMM")

coldata = data.frame(row.names=colnames(clean_counts), samples=colnames(clean_counts)) %>% tidyr::separate(samples,into = c("group"), extra = "drop")
coldata$group = sapply(coldata$group, tolower)
norm_counts = edgeR::cpm(dge, log=T)

MA <- normalizeBetweenArrays(as.matrix(norm_counts), method="none")

cat("## Expression density of normalize data\n\n")

Expression density of normalize data

ggplot(melt(as.data.frame(MA)), aes(x=value, colour=variable)) + geom_density()

cat("## MDS plot\n\n")

MDS plot

mds(MA, condition = coldata$group)

save_file(MA, "fa_model_log2_counts.tsv", protein_file)
design <- model.matrix(~ 0 + coldata$group)
colnames(design) = c("day1", "day14", "day2", "day7", "normal")
fit <- lmFit(MA, design)
contrast.matrix <- makeContrasts(day1-normal, day2-day1, day7-day2, day14-day7, 
                                 day2-normal, day7-normal, day14-normal,
                                 day7-day1, day7-day2, 
                                 day14-day1, day14-day2,
                                 levels=design)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)
res = topTable(fit2, adjust="BH", n="Inf")
save_file(res, "fa_model.tsv", protein_file)
absMaxFC = rowMax(as.matrix(abs(res[,1:11])))
sign = row.names(res[absMaxFC>2 & res$adj.P.Val<0.01,])

cat("## MDS plot of DE proteins\n\n")

MDS plot of DE proteins

mds(MA[sign,], condition = coldata$group)

coldata$group = factor(coldata$group, levels=order_group)
cat("## Clustering analysis\n\n")

Clustering analysis

clusters = degPatterns(MA[sign,], coldata, minc = 15, summarize = "group", time="group", col = NULL)

Working with 287 genes

.df=clusters$df
save_file(.df, "clusters_genes_cluster.tsv", protein_file)

3D interaction analysis

mrna_path = "rnaseq/final/2016-02-05_mrna_bio/files_publish"
mrna_matrix = read.csv(file.path(root_path, mrna_path, "fa_model_log2_counts.tsv"), row.names = 1)
mrna_clusters = read.csv(file.path(root_path, mrna_path, "clusters_genes.tsv"), row.names = 1)
mrna_col = data.frame(row.names=colnames(mrna_matrix), samples=colnames(mrna_matrix)) %>% tidyr::separate(samples,into = c("group"), extra = "drop") %>% mutate(group=factor(group, levels=order_group))
rownames(mrna_col) = colnames(mrna_matrix)
keep = rownames(mrna_col)[!is.na(mrna_col$group)]
mrna_col = mrna_col[keep,,drop=F]
mrna_matrix = mrna_matrix[,keep]

prot_path = "protein/files_publish"
prot_matrix = read.csv(file.path(root_path, prot_path, "fa_model_log2_counts.tsv"), row.names = 1)
prot_clusters = read.csv(file.path(root_path, prot_path, "clusters_genes_cluster.tsv"), row.names = 1)
prot_col = data.frame(row.names=colnames(prot_matrix), samples=colnames(prot_matrix)) %>% tidyr::separate(samples,into = c("group"), extra = "drop") %>% mutate(group=factor(group, levels=order_group))
rownames(prot_col) = colnames(prot_matrix)

mirna_path = "srnaseq/final/files_publish"
mirna_matrix = read.csv(file.path(root_path, mirna_path, "fa_model_mirna_log2_counts.tsv"), row.names = 1)
load(file.path(root_path, mirna_path, "ma.rda"))
mirna_col = data.frame(row.names=colnames(mirna_matrix), samples=colnames(mirna_matrix)) %>% tidyr::separate(samples,into = c("id", "group"), extra = "drop") %>% mutate(group=factor(gsub("day0", "control", tolower(group)), levels=order_group))
rownames(mirna_col) = colnames(mirna_matrix)
keep = rownames(mirna_col)[!is.na(mirna_col$group)]
mirna_col = mirna_col[keep,,drop=F]
mirna_matrix = mirna_matrix[,keep]
library(SummarizedExperiment)
library(isomiRs)
mrna_keep = as.character(mrna_clusters$genes)
prot_keep = unique(c(intersect(mrna_keep, rownames(prot_matrix)), as.character(prot_clusters$genes)))
keep = intersect(mrna_keep, rownames(prot_matrix))

df = degMerge(list(mrna=mrna_matrix[keep,], prot=prot_matrix[keep,]), 
                   list(mrna=mrna_clusters), 
                   list(mrna=mrna_col, prot=prot_col), 
                   summarize="group", 
                   time="group", col=NULL, 
                   scale=TRUE)

Integration of : mrna prot

Cluster number 1

Working with 62 genes

Cluster number 2

Working with 85 genes

Cluster number 3

Working with 34 genes

Cluster number 5

Working with 14 genes

Cluster number 6

Working with 11 genes

Cluster number 8

Working with 25 genes

Cluster number 9

Working with 13 genes

Cluster number 11

Working with 8 genes

Cluster number 14

Working with 14 genes

Cluster number 13

Working with 5 genes

Cluster number 16

Working with 5 genes

Cluster number 7

Working with 8 genes

Cluster number 27

Working with 5 genes

Cluster number 15

Working with 11 genes

Cluster number 30

Working with 6 genes

prot_merged = do.call(rbind, df)
prot_merged$name = convertIDs(as.character(prot_merged$gene), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
names(prot_merged) = paste0("prot_", names(prot_merged))

mi_rse = SummarizedExperiment(assays=SimpleList(norm=as.matrix(mirna_matrix)), colData= mirna_col)
gene_rse = SummarizedExperiment(assays=SimpleList(norm=as.matrix(mrna_matrix)), colData=mrna_col)
cor_tar = find_targets(mi_rse, gene_rse, ma)

Dimmension of cor matrix: 73 7548

mapping = melt(cor_tar) %>% filter(value<0)
keep = intersect(mrna_keep, as.character(unique(mapping[,1])))
mirna_keep = as.character(unique(mapping[,2]))

df = degMerge(list(mrna=mrna_matrix[keep,],
                   mirna=mirna_matrix[mirna_keep,]), 
              list(mrna=mrna_clusters), 
              list(mrna=mrna_col, mirna=mirna_col), 
              summarize="group", 
              time="group", col=NULL, 
              scale=TRUE, mapping=list(mirna=mapping))

Integration of : mrna mirna

Cluster number 1

Cluster number 2

Cluster number 3

Working with 6 genes

Cluster number 6

Working with 8 genes

Cluster number 8

Cluster number 5

Working with 5 genes

mirna_merged = do.call(rbind, df)
mirna_merged$name = convertIDs(as.character(mirna_merged$pair), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
names(mirna_merged) = paste0("mirna_", names(mirna_merged))

merged = merge(mirna_merged[mirna_merged$mirna_cor < -.6,c(6,7,1:5)], prot_merged[prot_merged$prot_cor > .6,c(5,7,1:4)], by=1, all=TRUE)
merged_all = merge(mirna_merged[c(6,7,1:5)], prot_merged[c(5,7,1:4)], by=1, all=TRUE)

save_file(merged, "omics_filtered.csv", result_files)
save_file(merged_all, "omics.csv", result_files)
save_file(prot_merged, "prot_omics.csv", result_files)
save_file(mirna_merged, "mirna_omics.csv", result_files)

R Session Info

(useful if replicating these results)

sessionInfo()
R version 3.3.0 (2016-05-03)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux stretch/sid

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    parallel  methods   stats     graphics  grDevices utils    
[8] datasets  base     

other attached packages:
 [1] isomiRs_0.99.18            DiscriMiner_0.1-29        
 [3] SummarizedExperiment_1.2.2 GenomicRanges_1.24.0      
 [5] GenomeInfoDb_1.8.1         DEGreport_1.5.0           
 [7] quantreg_5.24              SparseM_1.7               
 [9] org.Mm.eg.db_3.3.0         AnnotationDbi_1.34.3      
[11] IRanges_2.6.0              S4Vectors_0.10.1          
[13] Biobase_2.32.0             BiocGenerics_0.18.0       
[15] logging_0.7-103            gridExtra_2.2.1           
[17] cluster_2.0.4              dplyr_0.4.3               
[19] biomaRt_2.28.0             CHBUtils_0.1              
[21] limma_3.28.5               reshape_0.8.5             
[23] ggplot2_2.1.0              knitr_1.13                
[25] myRfunctions_0.1           rmarkdown_0.9.6           
[27] BiocInstaller_1.22.2      

loaded via a namespace (and not attached):
 [1] edgeR_3.14.0        splines_3.3.0       gtools_3.5.0       
 [4] Formula_1.2-1       assertthat_0.1      latticeExtra_0.6-28
 [7] yaml_2.1.13         RSQLite_1.0.0       lattice_0.20-33    
[10] chron_2.3-47        digest_0.6.9        RColorBrewer_1.1-2 
[13] XVector_0.12.0      colorspace_1.2-6    htmltools_0.3.5    
[16] Matrix_1.2-6        plyr_1.8.3          DESeq2_1.12.3      
[19] XML_3.98-1.4        genefilter_1.54.2   zlibbioc_1.18.0    
[22] xtable_1.8-2        scales_0.4.0        gdata_2.17.0       
[25] BiocParallel_1.6.2  MatrixModels_0.4-1  annotate_1.50.0    
[28] lazyeval_0.1.10     nnet_7.3-12         survival_2.39-4    
[31] magrittr_1.5        evaluate_0.9        GGally_1.0.1       
[34] gplots_3.0.1        foreign_0.8-66      tools_3.3.0        
[37] data.table_1.9.6    formatR_1.4         stringr_1.0.0      
[40] locfit_1.5-9.1      munsell_0.4.3       caTools_1.17.1     
[43] grid_3.3.0          RCurl_1.95-4.8      labeling_0.3       
[46] bitops_1.0-6        codetools_0.2-14    gtable_0.2.0       
[49] DBI_0.4-1           R6_2.1.2            Nozzle.R1_1.1-1    
[52] Hmisc_3.17-4        KernSmooth_2.23-15  stringi_1.1.1      
[55] Rcpp_0.12.5         geneplotter_1.50.0  rpart_4.1-10       
[58] acepack_1.3-3.3     coda_0.18-1